This document describes the steps, code, and files used to generate the ~50kb TwinStrand sequencing panel to study the genetics of male infertility. Specifically, we are targeting DNA damage repair genes, known cancer genes, AZF genes, and genes involved in clonal spermatogenesis.
This is the script used to generate a targeted sequencing panel for the spermseq male infertility project. )
Step 1: Copy the gene list from the spreadsheet and save as a 1-column text file (target_genes.txt) Step 2: Run below script to save into kingspeak UNIX environment
## Change working directory
cd ~/git/spermseq/twinstrand_target_gene_set
## Sort and unique
cat target_genes.txt | sort | uniq > target_genes_v1.txt
## Copy to kingspeak directory
scp target_genes_v1.txt u1240855@kingspeak.chpc.utah.edu:~/spermseq/twinstrand_targets
Use the build 38 GTF file to get other identifying information for each gene_name of interest. Look only at genes/regions that are coding sequences (hence grep -w “CDS” in the below command…)
## Define directory
cd ~/spermseq/twinstrand_target
## Download GTF file
wget ftp://ftp.ensembl.org/pub/release-102/gtf/homo_sapiens/Homo_sapiens.GRCh38.102.gtf.gz | zless | grep -w "CDS" > Homo_sapiens.GRCh38.102.CDS.gtf
scp u1240855@kingspeak.chpc.utah.edu:~/spermseq/twinstrand_targets/Homo_sapiens.GRCh38.102.CDS.gtf ~/git/spermseq/twinstrand_target_gene_set/data
## Get the gene_ids of the target genes of interest
cat target_genes_v1.txt | awk '{print "cat Homo_sapiens.GRCh38.102.CDS.gtf | grep \"gene_name \\\""$1"\\\"\""}' | bash | cut -f 9 | awk 'BEGIN{FS = "\"; "} ; {print $0}' | sed 's/\"; /|/g' | sed 's/;//g' | sed 's/ \"/=/g' > target_gene_list_gtf_identifiers.txt
## Copy target_genes_gid_tid_pid.txt file to local
scp u1240855@kingspeak.chpc.utah.edu:~/spermseq/twinstrand_targets/target_gene_list_gtf_identifiers.txt ~/git/spermseq/twinstrand_target_gene_set
Use R to obtain key GTF fields for each gene: gene_name, gene_id, transcript_id, protein_id, and exon_number.
## Load in packages
library(data.table)
library(dplyr)
## Read in the file
df <- read.table("~/git/spermseq/twinstrand_target_gene_set/target_gene_list_gtf_identifiers.txt")
## Go through each row and get the gene_id, gene_name, transcript_id, protein_id, exon_number, and exon_id
x <- df[1,]
ids <- rbindlist(apply(df, 1, function(x) {
fields <- (strsplit(x, split = "|", fixed = TRUE))[[1]]
fields <- fields[grep(paste(c("gene_name", "gene_id", "transcript_id", "protein_id", "exon_number"), collapse = "|"), fields)] %>%
strsplit(., split = "=", fixed = TRUE) %>% unlist()
return(data.table("gene_name"=fields[8],
"gene_id"=fields[2],
"transcript_id"=fields[4],
"protein_id"=fields[10],
"exon_number"=fields[6]))
}))
write.table(x = ids, file = "~/git/spermseq/twinstrand_target_gene_set/target_gene_list_ids.txt", quote = FALSE, sep = "\t", row.names = FALSE)
Obtain GTEx TPM data using gene_ids belonging to the genes of interest.
## Copy the gene list ids file to kingspeak
scp ~/git/spermseq/twinstrand_target_gene_set/target_gene_list_ids.txt u1240855@kingspeak.chpc.utah.edu:~/spermseq/twinstrand_targets
## Download master sample list to identify testis samples
cd ~/spermseq/data/GTEx/samples
wget https://storage.googleapis.com/gtex_analysis_v8/annotations/GTEx_Analysis_v8_Annotations_SampleAttributesDS.txt
grep -w -i "testis" GTEx_Analysis_v8_Annotations_SampleAttributesDS.txt | awk '{print $1}' > GTEx_Analysis_v8_Annotations_SampleAttributesDS_TestisOnlySamples_Data.txt
## Download transcript TPM data
cd ~/spermseq/data/GTEx/transcripts
wget https://storage.googleapis.com/gtex_analysis_v8/rna_seq_data/GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct.gz
gunzip GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct.gz
## Obtain GTEx transcript TPM data from genes of interest --> see "Get TPM data from GTEx testis samples"
cd ~/spermseq/data/GTEx/transcripts
head -n 1 GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct > header.temp ## Get the header which contains sample ids
## Run this if the file already exists: rm ~/spermseq/data/GTEx/transcripts/GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_TESTIS_transcript_tpm.gct
cat ~/spermseq/twinstrand_targets/target_gene_list_ids.txt | awk -v OFS='\t' '{print $1, $2}' | sort | uniq | grep -v "gene_name" | cut -f 2 | awk '{print "cat $HOME/spermseq/data/GTEx/transcripts/GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_transcript_tpm.gct | grep "$1" "}' | bash >> ~/spermseq/data/GTEx/transcripts/testis.data.temp
cat header.temp testis.data.temp > GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_TESTIS_transcript_tpm.gct
## Remove files
rm header.temp
rm testis.data.temp
## Copy final file to local directory
scp u1240855@kingspeak.chpc.utah.edu:~/spermseq/data/GTEx/transcripts/GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_TESTIS_transcript_tpm.gct ~/git/spermseq/twinstrand_target_gene_set/data
## Download appris principal isoform data
cd ~/spermseq/data/appris
wget http://apprisws.bioinfo.cnio.es/pub/current_release/datafiles/homo_sapiens/GRCh38/appris_data.principal.txt
## Copy to local directory
scp u1240855@kingspeak.chpc.utah.edu:~/spermseq/data/appris/appris_data.principal.txt ~/git/spermseq/twinstrand_target_gene_set/data
[insert image]
knitr::opts_chunk$set(echo=TRUE)
######################################
##### Load in necessary packages #####
######################################
library(data.table)
library(dplyr)
library(ggplot2)
library(beeswarm)
library(ggbeeswarm)
library(reshape)
library(ggpubr)
library(biomaRt)
library(ensembldb)
library(EnsDb.Hsapiens.v86)
##################################
##### Specify file locations #####
##################################
appris_file <- "~/git/spermseq/twinstrand_target_gene_set/data/appris_data.principal.txt"
gtf_file <- "~/git/spermseq/twinstrand_target_gene_set/data/Homo_sapiens.GRCh38.102.CDS.gtf"
GTEx_file <- "~/git/spermseq/twinstrand_target_gene_set/data/GTEx_Analysis_2017-06-05_v8_RSEMv1.3.0_TESTIS_transcript_tpm.gct"
GTEx_samples <- "~/git/spermseq/twinstrand_target_gene_set/data/GTEx_testis_samples.txt"
#########################################
##### Specify the genes of interest #####
#########################################
genes <- c("WT1", "CDC14A", "DMRT1", "KLHL10", "M1AP", "MEI1", "STAG3",
"SYCP2", "SYCP3", "TEX11", "USP26", "PKD1", "AR", "AKT3", "APOA1",
"APC", "ATM", "BRAF", "BRCA1", "BRCA2", "CBL", "CDKN2A", "CTNNB1", "DICER1",
"DNMT3A", "ERCC1", "ESR1", "FANCM", "FGFR2", "FGFR3", "H3-3A", "H3-3B", "HRAS",
"KIT", "KRAS", "LIG4", "MAP2K1", "MAP2K2", "MLH1", "MLH3", "MSH5", "NR5A1", "NF1",
"PMS2", "PPM1D", "PRKACA", "PTCH1", "PTPN11", "RAG1", "RAF1", "RB1", "RET", "SMAD4",
"SOS1", "STAT3", "STK11", "TEX14", "TEX15", "TSHR", "VHL", "XPA", "XRCC1")
moderate_genes <- c("USP26", "TEX14", "SYCP2", "STAG3", "PKD1", "M1AP", "KLHL10", "DMRT1", "CDC14A")
moderate_gene_ids <- c("ENSG00000134588", "ENSG00000121101", "ENSG00000196074", "ENSG00000066923", "ENSG00000008710", "ENSG00000159374", "ENSG00000161594", "ENSG00000137090", "ENSG00000079335")
genes <- genes[-which(genes %in% moderate_genes)]
genes <- c(genes, "USP9Y", "DDX3Y", "PRY", "DAZ1", "DAZ2", "DAZ3", "DAZ4", "DAZL", "CDY1", "CDY1B", "RBM5")
if (file.exists("~/git/spermseq/script/final.Rdata") == FALSE) {
source("~/git/spermseq/script/testis_GTEx_samples.R")
testis_GTEx_tpm <- testis_GTEx_samples(GTEx_file = GTEx_file, GTEx_samples = GTEx_samples)
head(testis_GTEx_tpm)
}
See APPRIS for column details… SANITY CHECK: This steps check that each gene of interest has an APPRIS isoform tag. If not, it will obtain the gene_id from biomaRt using the gene_name. No transcript_ids or CCDS_id will be obtained and the ‘status’ column will be flagged with a “minor” APPRIS designation.
if (file.exists("~/git/spermseq/script/final.Rdata") == FALSE) {
source("~/git/spermseq/script/appris_manipulation.R")
appris_df <- appris_manipulation(appris_file = appris_file, genes = genes)
head(appris_df)
}
## The following genes do not have any APPRIS isoform information:
if (file.exists("~/git/spermseq/script/final.Rdata") == FALSE) {
source("~/git/spermseq/script/appris_manipulation.R")
appris_df_tpm <- get_appris_tpm(appris_df = appris_df, testis_GTEx_tpm = testis_GTEx_tpm)
appris_df_tpm <- data.table(do.call("rbind", appris_df_tpm)) %>% `colnames<-`(c("gene_name", "gene_id", "transcript_id", "CCDS", "status", "avg_GTEx_TPM", "appris", "GTEx"))
appris_df_tpm
}
## [1] "Getting average GTEx TPM data for AKT3's transcript: ENST00000673466"
## [1] "Getting average GTEx TPM data for AKT3's transcript: ENST00000263826"
## [1] "Getting average GTEx TPM data for H3-3A's transcript: ENST00000661429"
## [1] "Getting average GTEx TPM data for H3-3A's transcript: ENST00000366815"
## [1] "Getting average GTEx TPM data for H3-3A's transcript: ENST00000366816"
## [1] "Getting average GTEx TPM data for H3-3A's transcript: ENST00000655399"
## [1] "Getting average GTEx TPM data for H3-3A's transcript: ENST00000366813"
## [1] "Getting average GTEx TPM data for H3-3A's transcript: ENST00000666609"
## [1] "Getting average GTEx TPM data for DNMT3A's transcript: ENST00000264709"
## [1] "Getting average GTEx TPM data for DNMT3A's transcript: ENST00000321117"
## [1] "Getting average GTEx TPM data for DNMT3A's transcript: ENST00000402667"
## [1] "Getting average GTEx TPM data for SOS1's transcript: ENST00000395038"
## [1] "Getting average GTEx TPM data for MLH1's transcript: ENST00000231790"
## [1] "Getting average GTEx TPM data for RBM5's transcript: ENST00000347869"
## [1] "Getting average GTEx TPM data for RAF1's transcript: ENST00000251849"
## [1] "Getting average GTEx TPM data for RAF1's transcript: ENST00000442415"
## [1] "Getting average GTEx TPM data for VHL's transcript: ENST00000256474"
## [1] "Getting average GTEx TPM data for DAZL's transcript: ENST00000399444"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000349496"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000645982"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000646725"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000645276"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000450969"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000645210"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000405570"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000643031"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000642992"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000642315"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000646369"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000647390"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000644867"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000643977"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000433400"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000645320"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000643541"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000396183"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000642248"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000643992"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000643297"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000396185"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000644873"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000431914"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000642426"
## [1] "Getting average GTEx TPM data for CTNNB1's transcript: ENST00000441708"
## [1] "Getting average GTEx TPM data for KIT's transcript: ENST00000412167"
## [1] "Getting average GTEx TPM data for KIT's transcript: ENST00000288135"
## [1] "Getting average GTEx TPM data for FGFR3's transcript: ENST00000412135"
## [1] "Getting average GTEx TPM data for FGFR3's transcript: ENST00000440486"
## [1] "Getting average GTEx TPM data for APC's transcript: ENST00000508376"
## [1] "Getting average GTEx TPM data for APC's transcript: ENST00000257430"
## [1] "Getting average GTEx TPM data for MSH5's transcript: ENST00000375703"
## [1] "Getting average GTEx TPM data for MSH5's transcript: ENST00000375755"
## [1] "Getting average GTEx TPM data for MSH5's transcript: ENST00000375750"
## [1] "Getting average GTEx TPM data for ESR1's transcript: ENST00000206249"
## [1] "Getting average GTEx TPM data for ESR1's transcript: ENST00000338799"
## [1] "Getting average GTEx TPM data for ESR1's transcript: ENST00000440973"
## [1] "Getting average GTEx TPM data for ESR1's transcript: ENST00000443427"
## [1] "Getting average GTEx TPM data for BRAF's transcript: ENST00000288602"
## [1] "Getting average GTEx TPM data for BRAF's transcript: ENST00000644969"
## [1] "Getting average GTEx TPM data for BRAF's transcript: ENST00000646891"
## [1] "Getting average GTEx TPM data for BRAF's transcript: ENST00000496384"
## [1] "Getting average GTEx TPM data for PMS2's transcript: ENST00000265849"
## [1] "Getting average GTEx TPM data for TEX15's transcript: ENST00000643185"
## [1] "Getting average GTEx TPM data for TEX15's transcript: ENST00000638951"
## [1] "Getting average GTEx TPM data for CDKN2A's transcript: ENST00000498124"
## [1] "Getting average GTEx TPM data for CDKN2A's transcript: ENST00000304494"
## [1] "Getting average GTEx TPM data for NR5A1's transcript: ENST00000373588"
## [1] "Getting average GTEx TPM data for XPA's transcript: ENST00000375128"
## [1] "Getting average GTEx TPM data for PTCH1's transcript: ENST00000429896"
## [1] "Getting average GTEx TPM data for PTCH1's transcript: ENST00000437951"
## [1] "Getting average GTEx TPM data for PTCH1's transcript: ENST00000421141"
## [1] "Getting average GTEx TPM data for PTCH1's transcript: ENST00000375274"
## [1] "Getting average GTEx TPM data for PTCH1's transcript: ENST00000430669"
## [1] "Getting average GTEx TPM data for PTCH1's transcript: ENST00000331920"
## [1] "Getting average GTEx TPM data for PTCH1's transcript: ENST00000418258"
## [1] "Getting average GTEx TPM data for RET's transcript: ENST00000355710"
## [1] "Getting average GTEx TPM data for FGFR2's transcript: ENST00000351936"
## [1] "Getting average GTEx TPM data for FGFR2's transcript: ENST00000346997"
## [1] "Getting average GTEx TPM data for FGFR2's transcript: ENST00000358487"
## [1] "Getting average GTEx TPM data for FGFR2's transcript: ENST00000457416"
## [1] "Getting average GTEx TPM data for APOA1's transcript: ENST00000375320"
## [1] "Getting average GTEx TPM data for APOA1's transcript: ENST00000375323"
## [1] "Getting average GTEx TPM data for APOA1's transcript: ENST00000359492"
## [1] "Getting average GTEx TPM data for APOA1's transcript: ENST00000236850"
## [1] "Getting average GTEx TPM data for CBL's transcript: ENST00000634840"
## [1] "Getting average GTEx TPM data for CBL's transcript: ENST00000264033"
## [1] "Getting average GTEx TPM data for HRAS's transcript: ENST00000451590"
## [1] "Getting average GTEx TPM data for HRAS's transcript: ENST00000311189"
## [1] "Getting average GTEx TPM data for HRAS's transcript: ENST00000397596"
## [1] "Getting average GTEx TPM data for WT1's transcript: ENST00000332351"
## [1] "Getting average GTEx TPM data for WT1's transcript: ENST00000639563"
## [1] "Getting average GTEx TPM data for RAG1's transcript: ENST00000299440"
## [1] "Getting average GTEx TPM data for ATM's transcript: ENST00000675843"
## [1] "Getting average GTEx TPM data for ATM's transcript: ENST00000452508"
## [1] "Getting average GTEx TPM data for ATM's transcript: ENST00000278616"
## [1] "Getting average GTEx TPM data for SYCP3's transcript: ENST00000392927"
## [1] "Getting average GTEx TPM data for SYCP3's transcript: ENST00000266743"
## [1] "Getting average GTEx TPM data for SYCP3's transcript: ENST00000392924"
## [1] "Getting average GTEx TPM data for PTPN11's transcript: ENST00000635625"
## [1] "Getting average GTEx TPM data for PTPN11's transcript: ENST00000351677"
## [1] "Getting average GTEx TPM data for KRAS's transcript: ENST00000311936"
## [1] "Getting average GTEx TPM data for KRAS's transcript: ENST00000256078"
## [1] "Getting average GTEx TPM data for RB1's transcript: ENST00000267163"
## [1] "Getting average GTEx TPM data for LIG4's transcript: ENST00000611712"
## [1] "Getting average GTEx TPM data for LIG4's transcript: ENST00000405925"
## [1] "Getting average GTEx TPM data for LIG4's transcript: ENST00000356922"
## [1] "Getting average GTEx TPM data for LIG4's transcript: ENST00000442234"
## [1] "Getting average GTEx TPM data for BRCA2's transcript: ENST00000544455"
## [1] "Getting average GTEx TPM data for BRCA2's transcript: ENST00000380152"
## [1] "Getting average GTEx TPM data for FANCM's transcript: ENST00000267430"
## [1] "Getting average GTEx TPM data for DICER1's transcript: ENST00000527414"
## [1] "Getting average GTEx TPM data for DICER1's transcript: ENST00000343455"
## [1] "Getting average GTEx TPM data for DICER1's transcript: ENST00000393063"
## [1] "Getting average GTEx TPM data for DICER1's transcript: ENST00000526495"
## [1] "Getting average GTEx TPM data for MLH3's transcript: ENST00000355774"
## [1] "Getting average GTEx TPM data for TSHR's transcript: ENST00000298171"
## [1] "Getting average GTEx TPM data for TSHR's transcript: ENST00000541158"
## [1] "Getting average GTEx TPM data for MAP2K1's transcript: ENST00000307102"
## [1] "Getting average GTEx TPM data for H3-3B's transcript: ENST00000589599"
## [1] "Getting average GTEx TPM data for H3-3B's transcript: ENST00000587560"
## [1] "Getting average GTEx TPM data for H3-3B's transcript: ENST00000586607"
## [1] "Getting average GTEx TPM data for H3-3B's transcript: ENST00000254810"
## [1] "Getting average GTEx TPM data for BRCA1's transcript: ENST00000357654"
## [1] "Getting average GTEx TPM data for BRCA1's transcript: ENST00000471181"
## [1] "Getting average GTEx TPM data for NF1's transcript: ENST00000358273"
## [1] "Getting average GTEx TPM data for NF1's transcript: ENST00000356175"
## [1] "Getting average GTEx TPM data for PPM1D's transcript: ENST00000305921"
## [1] "Getting average GTEx TPM data for STAT3's transcript: ENST00000404395"
## [1] "Getting average GTEx TPM data for STAT3's transcript: ENST00000588969"
## [1] "Getting average GTEx TPM data for STAT3's transcript: ENST00000264657"
## [1] "Getting average GTEx TPM data for SMAD4's transcript: ENST00000342988"
## [1] "Getting average GTEx TPM data for SMAD4's transcript: ENST00000398417"
## [1] "Getting average GTEx TPM data for PRKACA's transcript: ENST00000308677"
## [1] "Getting average GTEx TPM data for MAP2K2's transcript: ENST00000262948"
## [1] "Getting average GTEx TPM data for ERCC1's transcript: ENST00000300853"
## [1] "Getting average GTEx TPM data for ERCC1's transcript: ENST00000589165"
## [1] "Getting average GTEx TPM data for STK11's transcript: ENST00000326873"
## [1] "Getting average GTEx TPM data for STK11's transcript: ENST00000652231"
## [1] "Getting average GTEx TPM data for STK11's transcript: ENST00000586243"
## [1] "Getting average GTEx TPM data for XRCC1's transcript: ENST00000262887"
## [1] "Getting average GTEx TPM data for MEI1's transcript: ENST00000401548"
## [1] "Getting average GTEx TPM data for TEX11's transcript: ENST00000374333"
## [1] "Getting average GTEx TPM data for TEX11's transcript: ENST00000395889"
## [1] "Getting average GTEx TPM data for TEX11's transcript: ENST00000344304"
## [1] "Getting average GTEx TPM data for AR's transcript: ENST00000374690"
## [1] "Getting average GTEx TPM data for CDY1's transcript: ENST00000361963"
## [1] "Getting average GTEx TPM data for CDY1's transcript: ENST00000306609"
## [1] "Getting average GTEx TPM data for DAZ3's transcript: ENST00000382365"
## [1] "Getting average GTEx TPM data for DAZ3's transcript: ENST00000446723"
## [1] "Getting average GTEx TPM data for DAZ3's transcript: ENST00000315357"
## [1] "Getting average GTEx TPM data for DDX3Y's transcript: ENST00000336079"
## [1] "Getting average GTEx TPM data for DDX3Y's transcript: ENST00000360160"
## [1] "Getting average GTEx TPM data for DAZ2's transcript: ENST00000382440"
## [1] "Getting average GTEx TPM data for DAZ2's transcript: ENST00000382431"
## [1] "Getting average GTEx TPM data for DAZ2's transcript: ENST00000382294"
## [1] "Getting average GTEx TPM data for DAZ2's transcript: ENST00000400493"
## [1] "Getting average GTEx TPM data for DAZ2's transcript: ENST00000382306"
## [1] "Getting average GTEx TPM data for DAZ2's transcript: ENST00000382449"
## [1] "Getting average GTEx TPM data for DAZ2's transcript: ENST00000382424"
## [1] "Getting average GTEx TPM data for DAZ2's transcript: ENST00000449947"
## [1] "Getting average GTEx TPM data for DAZ2's transcript: ENST00000382433"
## [1] "Getting average GTEx TPM data for USP9Y's transcript: ENST00000651177"
## [1] "Getting average GTEx TPM data for USP9Y's transcript: ENST00000338981"
## [1] "Getting average GTEx TPM data for CDY1B's transcript: ENST00000382407"
## [1] "Getting average GTEx TPM data for CDY1B's transcript: ENST00000306882"
## [1] "Getting average GTEx TPM data for DAZ4's transcript: ENST00000382314"
## [1] "Getting average GTEx TPM data for DAZ4's transcript: ENST00000634662"
## [1] "Getting average GTEx TPM data for DAZ4's transcript: ENST00000382296"
## [1] "Getting average GTEx TPM data for DAZ1's transcript: ENST00000405239"
## [1] "Getting average GTEx TPM data for DAZ1's transcript: ENST00000382510"
## [1] "Getting average GTEx TPM data for PRY's transcript: ENST00000303728"
These are transcripts that are not identified as PRINCIPAL isoforms by APPRIS, but have avg TPM values across all testis samples > the minimum value of the APPRIS-defined PRINCIPAL isoforms.
NOTE: The GTEx-only transcript must have an avg TPM > 1 across all testis samples to be included in the dataset
if (file.exists("~/git/spermseq/script/final.Rdata") == FALSE) {
source("~/git/spermseq/script/gtex_transcript_tpm_manipulation.R")
appris_gtex_transcript_tpm <- gtex_transcript_tpm_manipulation(appris_df_tpm = appris_df_tpm, testis_GTEx_tpm = testis_GTEx_tpm, moderate_gene_ids = moderate_gene_ids)
appris_gtex_transcript_tpm
}
## [1] "ENSG00000117020"
## [1] "The GTEx-only-transcript, ENST00000336199 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000366539 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000366540 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000463991 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000490018 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000491219 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000492957 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000550388 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000552631 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000134982"
## [1] "The GTEx-only-transcript, ENST00000502371 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000504915 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000505084 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000505350 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000507379 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000508624 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000509732 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000512211 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000118137"
## [1] "The GTEx-only-transcript, ENST00000375329 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000169083"
## [1] "The GTEx-only-transcript, ENST00000396043 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000396044 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000504326 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000513847 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000514029 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000612010 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000612452 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000613054 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000149311"
## [1] "The GTEx-only-transcript, ENST00000419286 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000524792 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000525012 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000525056 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000525178 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000525537 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000526567 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000527389 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000527805 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000527891 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000529588 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000530958 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000531525 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000531957 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000532765 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000532931 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000533526 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000533690 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000533733 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000533979 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000534625 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000601453 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000638443 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000639240 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000639953 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000640388 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000157764"
## [1] "The GTEx-only-transcript, ENST00000469930 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000479537 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000497784 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000012048"
## [1] "The GTEx-only-transcript, ENST00000354071 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000412061 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000461221 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000461798 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000468300 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000470026 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000472490 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000473961 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000476777 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000477152 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000478531 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000484087 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000487825 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000489037 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000491747 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000492859 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000493795 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000493919 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000494123 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000497488 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000586385 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000591534 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000591849 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000618469 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000621897 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000634433 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000139618"
## [1] "The GTEx-only-transcript, ENST00000470094 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000528762 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000530893 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000614259 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000110395"
## [1] "The GTEx-only-transcript, ENST00000634301 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000637974 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000147889"
## [1] "The GTEx-only-transcript, ENST00000380151 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000494262 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000498628 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000530628 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000579122 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000172352"
## [1] "ENSG00000172288"
## [1] "ENSG00000168036"
## [1] "The GTEx-only-transcript, ENST00000426215 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000465552 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000471014 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000488914 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000188120"
## [1] "The GTEx-only-transcript, ENST00000426000 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000540248 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000620725 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000205944"
## [1] "ENSG00000187191"
## [1] "ENSG00000205916"
## [1] "The GTEx-only-transcript, ENST00000382432 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000415508 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000440066 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000449750 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000092345"
## [1] "The GTEx-only-transcript, ENST00000250863 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000454457 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000067048"
## [1] "The GTEx-only-transcript, ENST00000440554 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000454054 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000463199 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000469101 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000472510 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000493363 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000495478 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000100697"
## [1] "The GTEx-only-transcript, ENST00000529206 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000529720 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000531162 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000532458 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000541352 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000554367 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000556681 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000119772"
## [1] "The GTEx-only-transcript, ENST00000380756 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000406659 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000461228 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000466601 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000470983 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000474807 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000474887 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000482935 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000484184 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000491288 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000496570 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000012061"
## [1] "The GTEx-only-transcript, ENST00000588300 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000588738 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000589214 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000589381 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000590701 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000591636 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000592023 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000592083 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000592410 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000592444 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000592905 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000091831"
## [1] "The GTEx-only-transcript, ENST00000404742 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000406599 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000415488 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000427531 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000446550 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000456483 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000473497 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000482101 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000488573 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000638569 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000187790"
## [1] "The GTEx-only-transcript, ENST00000542564 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000554030 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000555013 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000555484 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000556036 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000556250 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000066468"
## [1] "The GTEx-only-transcript, ENST00000336553 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000356226 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000357555 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000359354 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000360144 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000369056 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000369058 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000369059 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000369060 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000429361 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000463870 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000467584 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000478859 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000490349 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000491111 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000491475 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000604236 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000611527 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000636922 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000068078"
## [1] "The GTEx-only-transcript, ENST00000340107 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000474521 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000507588 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000613647 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000163041"
## [1] "ENSG00000132475"
## [1] "The GTEx-only-transcript, ENST00000586518 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000587171 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000589949 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000591893 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000174775"
## [1] "The GTEx-only-transcript, ENST00000417302 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000478324 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000482021 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000493230 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000157404"
## [1] "The GTEx-only-transcript, ENST00000512959 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000514582 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000133703"
## [1] "The GTEx-only-transcript, ENST00000556131 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000557334 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000174405"
## [1] "The GTEx-only-transcript, ENST00000614526 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000169032"
## [1] "The GTEx-only-transcript, ENST00000425818 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000126934"
## [1] "The GTEx-only-transcript, ENST00000593364 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000595715 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000597008 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000597263 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000599021 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000599345 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000600584 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000601786 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000602167 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000167077"
## [1] "The GTEx-only-transcript, ENST00000403492 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000460702 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000462246 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000462450 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000473736 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000476614 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000476893 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000482055 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000484966 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000487535 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000492484 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000498456 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000540833 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000076242"
## [1] "The GTEx-only-transcript, ENST00000413212 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000413740 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000429117 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000432299 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000435176 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000441265 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000442249 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000447829 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000450420 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000454028 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000455445 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000456676 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000457004 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000458009 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000458205 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000466900 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000476172 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000485889 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000492474 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000536378 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000539477 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000616768 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000119684"
## [1] "The GTEx-only-transcript, ENST00000553263 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000555144 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000555499 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000555671 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000556257 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000556453 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000557648 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000204410"
## [1] "The GTEx-only-transcript, ENST00000375740 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000423982 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000450148 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000463094 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000468136 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000468602 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000482280 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000484309 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000497269 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000196712"
## [1] "The GTEx-only-transcript, ENST00000422121 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000431387 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000456735 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000466819 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000468273 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000479536 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000479614 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000487476 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000488981 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000489712 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000490416 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000493220 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000495910 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000577967 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000579081 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000581113 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000581790 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000582892 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000136931"
## [1] "The GTEx-only-transcript, ENST00000373587 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000455734 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000620110 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000122512"
## [1] "The GTEx-only-transcript, ENST00000380416 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000382321 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000406569 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000415839 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000441476 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000469652 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000170836"
## [1] "The GTEx-only-transcript, ENST00000590418 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000629650 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000072062"
## [1] "The GTEx-only-transcript, ENST00000350356 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000536649 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000587372 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000587533 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000588209 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000589284 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000589994 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000590853 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000593092 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000169789"
## [1] "The GTEx-only-transcript, ENST00000477123 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000185920"
## [1] "The GTEx-only-transcript, ENST00000375271 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000468211 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000488809 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000546820 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000547615 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000547672 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000548379 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000548420 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000548945 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000550136 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000550914 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000551425 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000551623 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000551845 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000553011 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000553256 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000179295"
## [1] "The GTEx-only-transcript, ENST00000392597 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000530818 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000531326 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000635652 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000639857 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000132155"
## [1] "The GTEx-only-transcript, ENST00000416093 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000423275 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000432427 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000460610 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000491290 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000492690 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000166349"
## [1] "The GTEx-only-transcript, ENST00000534663 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000139687"
## [1] "The GTEx-only-transcript, ENST00000467505 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000480491 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000484879 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000525036 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000531171 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000003756"
## [1] "The GTEx-only-transcript, ENST00000395174 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000404526 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000417905 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000433556 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000437500 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000438369 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000441305 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000441812 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000461242 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000462025 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000464087 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000464988 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000469838 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000471995 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000474470 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000474818 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000475128 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000475590 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000479275 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000489437 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000492430 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000492472 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000493993 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000494360 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000496179 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000165731"
## [1] "The GTEx-only-transcript, ENST00000340058 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000479913 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000498820 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000615310 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000640619 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000141646"
## [1] "The GTEx-only-transcript, ENST00000585448 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000586253 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000588745 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000588860 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000589076 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000589706 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000589941 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000590061 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000590499 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000591914 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000592186 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000593223 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000115904"
## [1] "The GTEx-only-transcript, ENST00000402219 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000451331 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000461545 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000472480 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000168610"
## [1] "The GTEx-only-transcript, ENST00000389272 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000462286 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000478276 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000498330 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000585360 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000585517 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000588065 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000590776 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000118046"
## [1] "The GTEx-only-transcript, ENST00000585465 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000585748 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000585851 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000586358 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000589152 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000591133 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000593219 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000139351"
## [1] "The GTEx-only-transcript, ENST00000478139 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000478238 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000120498"
## [1] "ENSG00000133863"
## [1] "The GTEx-only-transcript, ENST00000518257 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000165409"
## [1] "The GTEx-only-transcript, ENST00000342443 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000553763 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000554263 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000554435 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000555326 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000556031 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000557096 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000636454 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000637447 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000114374"
## [1] "The GTEx-only-transcript, ENST00000493168 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000134086"
## [1] "The GTEx-only-transcript, ENST00000477538 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000184937"
## [1] "The GTEx-only-transcript, ENST00000448076 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000452863 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000527775 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000527882 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000639907 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000640146 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000136936"
## [1] "The GTEx-only-transcript, ENST00000462523 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000485042 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000496104 did not meet the threshold avg TPM cutoffs..."
## [1] "ENSG00000073050"
## [1] "The GTEx-only-transcript, ENST00000594107 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000594511 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000595789 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000597811 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000598165 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000598422 did not meet the threshold avg TPM cutoffs..."
## [1] "The GTEx-only-transcript, ENST00000599693 did not meet the threshold avg TPM cutoffs..."
if (file.exists("~/git/spermseq/script/final.Rdata") == FALSE) {
# This is the longest step, see if the data already exists
if (file.exists("~/git/spermseq/script/gtf_df.Rdata")) {
load("~/git/spermseq/script/gtf_df.Rdata")
}
if (!file.exists("~/git/spermseq/script/gtf_df.Rdata")) {
## Read in the gtf file
gtf <- fread(gtf_file, header=FALSE) %>% .[,c(1,4,5,9)] %>% `colnames<-` (c("chr", "start", "stop", "info"))
## Get exon positions
source("~/git/spermseq/script/gtf_manipulation.R")
gtf_df <- gtf_manipulation(gtf = gtf, df = appris_gtex_transcript_tpm)
gtf_df
save.image("~/git/spermseq/script/gtf_df.Rdata")
## Write output to text file
write.table(x = gtf_df, file = "~/git/spermseq/twinstrand_target_gene_set/output/appris_gtex_gtf_testis.txt",
quote = FALSE, sep = "\t", row.names = FALSE)
}
}
gtf[which(gtf$transcript_biotype != "protein_coding" & gtf$appris == 0 & gtf$GTEx == 1)]
# Check "PPM1D" "PRKACA" "RB1" "DDX3Y" "RBM5"
gtf_df[gene_name %in% c("PPM1D", "PRKACA", "RB1", "DDX3Y", "RBM5", "MLH1")]
Schematic depicting how different exons across isoforms of a gene are merged.
if (file.exists("~/git/spermseq/script/final.Rdata") == FALSE) {
## See if pfam data already exists
if (file.exists("~/git/spermseq/script/pfam_df.Rdata")) {
load("~/git/spermseq/script/pfam_df.Rdata")
}
if (!file.exists("~/git/spermseq/script/pfam_df.Rdata")) {
## Output the warning messages of the pfam domain exon analysis
if (file.exists("~/git/spermseq/script/get_pfam_log.txt")) {
file.remove("~/git/spermseq/script/get_pfam_log.txt")
}
## Capture output to a file
sink_file <- file("~/git/spermseq/script/get_pfam_log.txt", open = "wt")
sink(sink_file)
sink(sink_file, type = "message")
## Run the function
source("~/git/spermseq/script/get_pfam.R")
pfam_df <- get_pfam(df = gtf_df)
## Output the data
pfam_df
## Save image
save.image("~/git/spermseq/script/pfam_df.Rdata")
## Revert output back to the console
sink(type = "message")
sink()
}
}
## Warning: Could not find a CDS whith the expected length for protein:
## 'ENSP00000450632'. The returned genomic coordinates might thus not be correct
## for this protein.
## Warning: Could not find a CDS whith the expected length for protein:
## 'ENSP00000481464'. The returned genomic coordinates might thus not be correct
## for this protein.
## Warning: Could not find a CDS whith the expected length for protein:
## 'ENSP00000491912'. The returned genomic coordinates might thus not be correct
## for this protein.
## Warning: Could not find a CDS whith the expected length for protein:
## 'ENSP00000465403'. The returned genomic coordinates might thus not be correct
## for this protein.
## Warning: Could not find a CDS whith the expected length for protein:
## 'ENSP00000451130'. The returned genomic coordinates might thus not be correct
## for this protein.
## Warning: Could not find a CDS whith the expected length for protein:
## 'ENSP00000491505'. The returned genomic coordinates might thus not be correct
## for this protein.
## Warning: Could not find a CDS whith the expected length for protein:
## 'ENSP00000406876'. The returned genomic coordinates might thus not be correct
## for this protein.
wtf <- pfam_df$gene_name %>% unique
if (length(wtf) != length(genes)) {
wtf_genes <- genes[!(genes %in% wtf)]
wtf_genes
gtf_df[gene_name %in% wtf_genes]
}
pfam_df[gene_name %in% c("PPM1D", "PRKACA", "RB1", "DDX3Y", "RBM5", "MLH1")]
Schematic depicting how different exons across isoforms of a gene are merged.
if (file.exists("~/git/spermseq/script/final.Rdata") == FALSE) {
source("~/git/spermseq/script/merge_exons.R")
merged_exons <- merge_exons(df = pfam_df)
## Get the total panel space (exonic)
total <- sum(merged_exons$exon_size)
## Calculate proportion of panel space taken up by each gene
merged_exons$proportion_total <- merged_exons$total_nucleotides/total*100
#head(merged_exons)
## Remove exons found below threshold of total proportion of transcripts for a given gene:
source("~/git/spermseq/script/transcript_proportion_filter.R")
merged_exons <- transcript_proportion_filter(merged_exons = merged_exons)
## Write the output file
write.table(x = merged_exons, file = paste0("~/git/spermseq/twinstrand_target_gene_set/output/merged_exons.txt"),
quote = FALSE, sep = "\t", row.names = FALSE)
}
## [1] "ENSG00000117020" "AKT3"
## [1] "ENSG00000134982" "APC"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000118137" "APOA1"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000169083" "AR"
## [1] "ENSG00000149311" "ATM"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000157764" "BRAF"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 0"
## [1] "situation 3"
## [1] "situation 5"
## [1] "ENSG00000012048" "BRCA1"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 2"
## [1] "situation 4"
## [1] "17:43076537-43076611"
## [1] "t: 3"
## [1] "r: 4"
## [1] "length: 1"
## [1] "17:43076488-43076614 | 17:43076488-43076614 - o"
## [1] "ENSG00000139618" "BRCA2"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000110395" "CBL"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000147889" "CDKN2A"
## [1] "situation 5"
## [1] "situation 2"
## [1] "situation 5"
## [1] "situation 2"
## [1] "situation 5"
## [1] "situation 3"
## [1] "ENSG00000172352" "CDY1B"
## [1] "situation 0"
## [1] "ENSG00000172288" "CDY1"
## [1] "situation 0"
## [1] "ENSG00000168036" "CTNNB1"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 0"
## [1] "situation 2"
## [1] "situation 3"
## [1] "situation 4"
## [1] "3:41224534-41224753"
## [1] "t: 26"
## [1] "r: 1"
## [1] "length: 1"
## [1] "3:41224526-41224759 - s0 | 3:41224526-41224753 - s2 | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o | 3:41224526-41224753 - o"
## [1] "ENSG00000188120" "DAZ1"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 3"
## [1] "situation 5"
## [1] "ENSG00000205944" "DAZ2"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000187191" "DAZ3"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000205916" "DAZ4"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000092345" "DAZL"
## [1] "ENSG00000067048" "DDX3Y"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000100697" "DICER1"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 0"
## [1] "ENSG00000119772" "DNMT3A"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 2"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000012061" "ERCC1"
## [1] "situation 5"
## [1] "situation 0"
## [1] "situation 3"
## [1] "ENSG00000091831" "ESR1"
## [1] "ENSG00000187790" "FANCM"
## [1] "situation 3"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000066468" "FGFR2"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 0"
## [1] "situation 5"
## [1] "situation 2"
## [1] "situation 0"
## [1] "situation 2"
## [1] "situation 5"
## [1] "ENSG00000068078" "FGFR3"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 0"
## [1] "situation 0"
## [1] "ENSG00000163041" "H3-3A"
## [1] "situation 0"
## [1] "ENSG00000132475" "H3-3B"
## [1] "situation 3"
## [1] "situation 3"
## [1] "situation 3"
## [1] "situation 3"
## [1] "situation 0"
## [1] "ENSG00000174775" "HRAS"
## [1] "situation 5"
## [1] "ENSG00000157404" "KIT"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000133703" "KRAS"
## [1] "situation 5"
## [1] "ENSG00000174405" "LIG4"
## [1] "ENSG00000169032" "MAP2K1"
## [1] "situation 5"
## [1] "ENSG00000126934" "MAP2K2"
## [1] "situation 2"
## [1] "ENSG00000167077" "MEI1"
## [1] "situation 4"
## [1] "22:41795422-41795452"
## [1] "t: 1"
## [1] "r: 1"
## [1] "length: 1"
## [1] "22:41795411-41795542 - o"
## [1] "ENSG00000076242" "MLH1"
## [1] "ENSG00000119684" "MLH3"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 2"
## [1] "ENSG00000204410" "MSH5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 3"
## [1] "situation 5"
## [1] "situation 2"
## [1] "situation 3"
## [1] "situation 3"
## [1] "ENSG00000196712" "NF1"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000136931" "NR5A1"
## [1] "ENSG00000122512" "PMS2"
## [1] "ENSG00000170836" "PPM1D"
## [1] "ENSG00000072062" "PRKACA"
## [1] "ENSG00000169789" "PRY"
## [1] "ENSG00000185920" "PTCH1"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 0"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 2"
## [1] "situation 3"
## [1] "ENSG00000179295" "PTPN11"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 3"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000132155" "RAF1"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000166349" "RAG1"
## [1] "ENSG00000139687" "RB1"
## [1] "ENSG00000003756" "RBM5"
## [1] "ENSG00000165731" "RET"
## [1] "situation 3"
## [1] "situation 2"
## [1] "ENSG00000141646" "SMAD4"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000115904" "SOS1"
## [1] "ENSG00000168610" "STAT3"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 0"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000118046" "STK11"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000139351" "SYCP3"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000120498" "TEX11"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 2"
## [1] "ENSG00000133863" "TEX15"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 2"
## [1] "ENSG00000165409" "TSHR"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "ENSG00000114374" "USP9Y"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 3"
## [1] "ENSG00000134086" "VHL"
## [1] "ENSG00000184937" "WT1"
## [1] "situation 5"
## [1] "situation 0"
## [1] "ENSG00000136936" "XPA"
## [1] "ENSG00000073050" "XRCC1"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
## [1] "situation 5"
merged_exons[gene_name == "CDY1"]
summary(merged_exons$exon_size)
## Factor the data so that it plots the x-axis in an order
merged_exons$x_axis <- paste0(merged_exons$gene_name, " (n = " , merged_exons$total_exons, " exons; avg = ", merged_exons$avg_exon_size, ")")
merged_exons$x_axis <- factor(merged_exons$x_axis, levels = unique(merged_exons$x_axis[order(merged_exons$total_nucleotides, decreasing = TRUE)]))
## Generate scatter plot --> Exons (y) per gene (x)
p1 <- ggplot(data = merged_exons) + geom_point(aes(x = x_axis, y = exon_size, color = transcript_num)) +
theme(axis.ticks.x = element_blank(), axis.text.x = element_text(angle = 75, hjust = 1)) +
labs(x = "Gene", y = paste0("Exon Size"), color="# of Transcripts") + scale_y_log10() +
scale_color_gradient(low = "grey", high = "red")
## Show proportion of panel space taken up by each gene
p2 <- ggplot(data = merged_exons) + geom_line(aes(x = x_axis, y = proportion_total, group = 1), color = "Blue") +
theme(axis.ticks.x = element_blank(), axis.text.x = element_blank(), axis.title.x = element_blank()) +
labs(y = paste0("Proportion of", "\n", "Total (%)", "\n")) +
annotate(geom = "text", x = 40, y = 5, label = paste0("Total = ", sum(merged_exons$exon_size), " nucleotides")) +
geom_vline(xintercept = 9.5, linetype = 2, color = "red")
ggarrange(p2, p1, nrow=2, ncol=1, common.legend = TRUE, legend = "right", heights = c(0.5, 2))
## Download [3D Hotspots](http://www.3dhotspots.org/#/home) dataset and manually save as a txt file
wget http://www.3dhotspots.org/files/3d_hotspots.xls
## Download [Cancer Hotspots](https://www.cancerhotspots.org/#/home) MAF file
cd ~/spermseq/data/hotspots
wget http://download.cbioportal.org/cancerhotspots/cancerhotspots.v2.maf.gz
zless cancerhotspots.v2.maf.gz | grep -v "#" | grep -w "protein_coding" > cancerhotspots.v2.maf
zless cancerhotspots.v2.maf.gz | grep -v "#" | head -n 1 > cancerhotspots.v2.maf.header
cat cancerhotspots.v2.maf.header cancerhotspots.v2.maf > cancerhotspots.v2.proteincoding.maf
cat cancerhotspots.v2.proteincoding.maf | cut -f 1,2,5,6,7,38 > cancerhotspots.v2.proteincoding.columns.maf
## Copy to local directory
scp u1240855@kingspeak.chpc.utah.edu:~/spermseq/data/hotspots/cancerhotspots.v2.proteincoding.columns.maf ~/git/spermseq/twinstrand_target_gene_set/data
## Download the liftover file to convert hg19 coordinates from MAF file to hg38
wget https://hgdownload.cse.ucsc.edu/goldenpath/hg19/liftOver/hg19ToHg38.over.chain.gz
gunzip hg19ToHg38.over.chain.gz > hg19ToHg38.over.chain
scp u1240855@kingspeak.chpc.utah.edu:~/spermseq/data/hotspots/hg19ToHg38.over.chain ~/git/spermseq/twinstrand_target_gene_set/data
## Exons with 3d hotspot mutations
if (file.exists("~/git/spermseq/script/final.Rdata") == FALSE) {
source("~/git/spermseq/script/format_hotspot.R")
merged_exons_3d_hotspot <- format_3D_hotspot(hotspot_file = "~/git/spermseq/twinstrand_target_gene_set/data/3d_hotspots_gao.txt",
genes = genes, merged_exons = merged_exons)
}
## Exons with snp/onp/ins/del hotspot mutations
if (file.exists("~/git/spermseq/script/final.Rdata") == FALSE) {
source("~/git/spermseq/script/format_hotspot.R")
hotspot_file <- "~/git/spermseq/twinstrand_target_gene_set/data/cancerhotspots.v2.proteincoding.columns.maf"
merged_exons_hotspot <- format_hotspot(hotspot_file = hotspot_file, genes = genes, merged_exons_3d_hotspot = merged_exons_3d_hotspot)
merged_exons_hotspot
}
## [1] "CDKN2A"
## [1] "multiple_transcripts"
if (file.exists("~/git/spermseq/script/final.Rdata") == FALSE) {
## Load in necessary packages
library(BSgenome.Hsapiens.UCSC.hg38)
library(BSgenome)
library(GenomicRanges)
## Define GetGC function
GetGC <- function(bsgenome, gr){
seqs <- BSgenome::getSeq(bsgenome, gr)
return(as.numeric(Biostrings::letterFrequency(x = seqs, letters = "GC", as.prob = TRUE)))
}
## Get the IRanges
ranges <- IRanges(start = merged_exons_hotspot$start, end = merged_exons_hotspot$stop)
## Get GRanges object
gr <- GRanges(seqnames = paste0("chr", merged_exons_hotspot$chr), ranges=ranges)
## Get the gc content of each sequence
merged_exons_hotspot$gc_content <- GetGC(bsgenome = BSgenome.Hsapiens.UCSC.hg38, gr = gr)
write.table(x = merged_exons_hotspot, file = "~/git/spermseq/twinstrand_target_gene_set/output/merged_exons_final.txt",
sep = "\t", quote = FALSE, row.names = FALSE)
## Save the data
save.image("~/git/spermseq/script/final.Rdata")
}
if (file.exists("~/git/spermseq/script/final.Rdata") == TRUE) {
load("~/git/spermseq/script/final.Rdata")
}
## Filter out exons/rows that have NA values in pfam_id column
merged_exons_pfam <- na.omit(merged_exons_hotspot)
merged_exons_pfam_hotspot <- merged_exons_pfam[hotspot_3d_exon_num > 0 | hotspot_exon_num > 0]
## Calculate proportion of panel space taken up by each gene
total <- sum(merged_exons_pfam_hotspot$exon_size)
merged_exons_pfam_hotspot <- rbindlist(lapply(unique(merged_exons_pfam_hotspot$gene_id), function(x, merged_exons_pfam_hotspot) {
temp <- merged_exons_pfam_hotspot[gene_id == x]
temp$total_nucleotides <- sum(temp$exon_size)
temp$total_exons <- nrow(temp)
temp$transcript_num <- (temp$transcript_id %>% strsplit(., split = " | "))[[1]] %>% unique() %>% length()
temp$avg_exon_size <- round(mean(temp$exon_size), digits = 2)
return(temp)
}, merged_exons_pfam_hotspot=merged_exons_pfam_hotspot))
merged_exons_pfam_hotspot$proportion_total <- merged_exons_pfam_hotspot$total_nucleotides/total*100
source("~/git/spermseq/script/plot_df.R")
p <- plot_df(df = merged_exons_pfam_hotspot, genes = genes)
p
## Filter out exons/rows that have NA values in pfam_id column
merged_exons_pfam <- na.omit(merged_exons_hotspot)
## Calculate proportion of panel space taken up by each gene
total <- sum(merged_exons_pfam$exon_size)
merged_exons_pfam <- rbindlist(lapply(unique(merged_exons_pfam$gene_id), function(x, merged_exons_pfam) {
temp <- merged_exons_pfam[gene_id == x]
temp$total_nucleotides <- sum(temp$exon_size)
temp$total_exons <- nrow(temp)
temp$transcript_num <- (temp$transcript_id %>% strsplit(., split = " | "))[[1]] %>% unique() %>% length()
temp$avg_exon_size <- round(mean(temp$exon_size), digits = 2)
return(temp)
}, merged_exons_pfam=merged_exons_pfam))
merged_exons_pfam$proportion_total <- merged_exons_pfam$total_nucleotides/total*100
source("~/git/spermseq/script/plot_df.R")
p <- plot_df(df = merged_exons_pfam, genes = genes)
p
## Subset the final dataset
final <- merged_exons_pfam[transcript_proportion >= 0.2]
## Generate plot
source("~/git/spermseq/script/plot_df.R")
p <- plot_df(df = final, genes = genes)
p
## Subset the final dataset
final <- merged_exons_pfam[transcript_proportion >= 0.4]
## Generate plot
source("~/git/spermseq/script/plot_df.R")
p <- plot_df(df = final, genes = genes)
p
## Subset the final dataset
final <- merged_exons_pfam[transcript_proportion >= 0.5]
## Generate plot
source("~/git/spermseq/script/plot_df.R")
p <- plot_df(df = final, genes = genes)
p
## Subset the final dataset
final <- merged_exons_pfam[transcript_proportion >= 0.6]
## Generate plot
source("~/git/spermseq/script/plot_df.R")
p <- plot_df(df = final, genes = genes)
p
## Subset the final dataset
final <- merged_exons_pfam[transcript_proportion >= 0.8]
## Generate plot
source("~/git/spermseq/script/plot_df.R")
p <- plot_df(df = final, genes = genes)
p
## Subset the final dataset
final <- merged_exons_pfam[transcript_proportion == 1]
## Generate plot
source("~/git/spermseq/script/plot_df.R")
p <- plot_df(df = final, genes = genes)
p